package com.moduls.data.common;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;

/**
 * 黄页公司信息抓取
 */

public class Jsp {
    public static void main(String[] args) {
        try {
            Document document = PsUtil.getElement("http://b2b.11467.com");
            Element il = document.getElementById("il");
            Element div = il.getElementsByTag("div").get(16);
            Elements a = div.getElementsByTag("a");
            int b = 1;
            for (Element element:a) {
                //System.out.println(element.attr("href"));
                if(b == 1){
                    System.out.println(element.attr("href"));
                    Document href = PsUtil.getElement("http:"+element.attr("href"));
                    Element elementsByTag = href.getElementById("il").getElementsByTag("div").get(1);
                    //第二级    市区行业分类
                    Elements dt = elementsByTag.getElementsByTag("dt");
                    for (Element e:dt) {
                        String attr = e.getElementsByTag("a").attr("href");
                        Document element1 = PsUtil.getElement("http:" + attr);
                       //企业列表页面  直接获取分页路径
                        Elements elementsByClass = element1.getElementById("il").getElementsByClass("pages");
                        //System.out.println(elementsByClass);
                        for (Element element3:elementsByClass) {
                           // System.out.println(element3.getElementsByTag("a").attr("href"));
                            for (Element elementOne:element3.getElementsByTag("a")) {
                                //System.out.println(elementOne.attr("href"));
                                //至此  已取出所有市区  所有公司分页的链接

                                getAllCompany(elementOne.attr("href"));
                            }
                        }


                    }
                }
                b++;

            }
        }catch (Exception e){

    }
    }

    public static void getAllCompany(String href) throws Exception{
        Document element = PsUtil.getElement("http:"+href);
        //System.out.println(element);
        Thread.sleep((int)(100+Math.random()*(200-100+1)));
        Elements companylist = element.getElementsByClass("companylist");
        for (Element element1:companylist) {
            //当前页面的每个公司的链接
            String attr = element1.getElementsByTag("a").attr("href");
            Document element2 = PsUtil.getElement("http:" + attr);
            //
        }
    }


}
